/*
***********************************

Author: 	Paul Clist
Website: 	https://paulclist.github.io/
Email:		paul.clist@uea.ac.uk
Date:		March 2025

This code runs analysis, for "The Common Problem of Bad Controls in Tests of the Linguistic Savings Hypothesis"

They comprise:
1. An example
2. Investigating Ayres, I., Katz, T. K., and Regev, T. (2023). Languages and future-oriented economic
behavior—experimental evidence for causal effects. Proceedings of the National Academy of
Sciences, 120(7):e2208871120.

For 2 you will need to load the appropriate data. I provide a bit of code to 
set the global for where you've saved this. If you update it, then everything else should work. 

***********************************
*/



/*
***********************************
0. install packages, and set up directory
***********************************

If not already installed, install these packages by uncommenting the appropriate 
line. Only needs to be done once. 
*/

* ssc install estout, replace // helps display results in tables
* ssc install coefplot, replace // helps display results graphically
* net install speccurve, from("https://raw.githubusercontent.com/martin-andresen/speccurve/master") // specification curve
* ssc install center, replace // centre data

**** CHANGE THIS LINE to the appropriate folder, where you have saved the .dta files. 
clear all 
global directory  "/Users/paulclist/Library/CloudStorage/OneDrive-UniversityofEastAnglia/work/language bad controls/RSOS writing/data/"

cd "$directory"


/*
***********************************
1. an example 
***********************************
*/

clear
est clear
set obs 100000
set seed 12345
gen FTR_strong=runiformint(0,1)
gen noise_control=rnormal()
gen noise_outcome=rnormal()
gen control=noise_control+2*FTR
gen outcome = noise_outcome + FTR*1 + control*-.5
eststo: reg outcome FTR control
eststo: reg outcome FTR
esttab
 
 
 
 /*
***********************************
2. Languages and future-oriented economic behavior—experimental evidence for causal effects
***********************************
*/
* this has four parts. Headings for subparts are indented. 
clear
use "study1_data.dta"

	/*
	***********************************
	3a. Some tests summarised in text 
	I've quoted the relevant bit of text. 
	***********************************
	*/
* "The average proficiency score in strong FTR languages (8.11)  is different from weak FTR languages (7.48) at the 1\% level ($t=9.57$, $N=1130$, unpaired test, $p<0.0000$)."
ttest   lang1_proficiency==  lang2_proficiency, unp


* "There are large differences in the proficiency scores by language, with those responding in English getting much higher scores (an average of 8.5 out of 9, whereas the other languages all have averages between 7.3 and 8)."
est clear 
eststo: reg lang1_pro ibn.lang1_encode, nocons
eststo: reg lang2_pro ibn.lang2_encode , nocons
coefplot est1 est2, scale(2) ysize(3) legend(label(2 "Strong") label(4 "Weak") note("FTR"))



* "Around half of the sample speaks English, so a high proficiency score contains information on the likelihood the person speaks English. ""
tab lang1_encode


* "In a regression of being native on proficiency, $\hat{\beta}=-0.228$, $t=-3.33$ and $p=0.001$."
preserve 
est clear
expand 2, gen(b)
gen prof		=lang1_proficiency if b==0
replace prof	=lang2_proficiency if b==1
gen native		= native_1 if b==0
replace native	= native_2 if b==1
eststo: reg prof native
restore

	/*
	***********************************
	3b. Regression table 2
	With and without one control
	***********************************
	*/

** Regression with and without control
est clear
eststo: xi: tobit reserve_numb i.askedstrong proficiency_asked,  ll(3.05) ul(7) 
eststo: xi: tobit reserve_numb i.askedstrong ,  ll(3.05) ul(7) 

esttab est1 est2 , b(%9.2f) label se  star(* .1 ** .05 *** .01)

	/*
	***********************************
	3c. Figure 3
	Which controls are bad? 
	***********************************
	*/

clear
use "study1_data.dta"
est clear


* create dummies for their categorical variables
tab race, 	 gen(race_d)
tab genus_1, gen(genus1_d)
tab genus_2, gen(genus2_d)
tab lang_pair_encode, gen(pair_d)
global controls   proficiency_asked  strong_weak_gap gender  college ///
race_d1 race_d2 race_d3 race_d4 race_d5 ///
genus1_d1 genus1_d2 genus1_d3 genus2_d1 genus2_d2  ///
pair_d1 pair_d2 pair_d3 pair_d4 pair_d5 pair_d6 pair_d7 pair_d8 pair_d9 pair_d10 pair_d11 pair_d12

* regressions
 foreach c in $controls {
 	center `c', inplace standardize
qui:	reg `c' askedstrong
	est sto `c'
}

* making the graph
set scheme stcolor
coefplot *,  drop(_cons)  aseq swapnames nokey xline(0) ///
 ylab(1 "Proficiency" 2 "Strong-Weak Prof. Gap" 3 "Male" ///
4 "College" 5 "White/Caucasian" 6 "African American" 7 "Hispanic" 8 "Asian" 9 "Other Race" ///
10 "L1, Germanic" 11 "L1, Indic" 12 "L1, Romance" 13 "L2, Germanic" 14 "L2, Indic" ///
15 "English-Dutch" 16 "English-German" 17 "English-Mandarin" 18 "French-Dutch" 19 "French-German" ///
20 "French-Mandarin" 21 "Hindi-Dutch" 22 "Hindi-German" 23 "Hindi-Mandarin"  24 "Spanish-Dutch" ///
25 "Spanish-German" 26 "Spanish-Mandarin") scale(1.2) xsize(8) ysize(5) ///
xtitle("Beta, With Treatment as the Independent Variable") ytitle("Standardised Dependent Variables") ///
xticks(-.5(.25)1)


	/*
	***********************************
	3d. Figure 4
	Specification curve
	***********************************
	*/

clear
use "study1_data.dta"
est clear

*set up overal counter
local counter = 0 
* and all control counters
local count1  = 0 	
foreach c1 in " " "proficiency_asked" { 		
 local count2  = 0 	
 foreach c2 in " " "strong_weak_gap" { 				
 local count3  = 0 	
 foreach c3 in " " "gender" { 		
 local count4  = 0 	
 foreach c4 in " " "college" { 		
 local count5  = 0 	
 foreach c5 in " " "i.race_simple" { 
 local count6  = 0 	
 foreach c6 in " " "i.genus_1" { 		
 local count7  = 0 	
 foreach c7 in " " "i.genus_2" { 		
 local count8  = 0 	
 foreach c8 in " " "i.lang_pair_encode" {  
			
			* actual regressions
			qui: eststo: xi: tobit reserve_numb ///
			askedstrong `c1' `c2' `c3' `c4' `c5' `c6' `c7'	`c8'		///
			,  ll(3.05) ul(7)
	
		* store counters
		qui: estadd scalar run_number		= `counter'
		qui: estadd scalar proficiency_asked= (`count1'==1)
		qui: estadd scalar strong_weak_gap 	= (`count2'==1)
		qui: estadd scalar gender 			= (`count3'==1)
		qui: estadd scalar college			= (`count4'==1)
		qui: estadd scalar race 			= (`count5'==1)
		qui: estadd scalar genus1 			= (`count6'==1)
		qui: estadd scalar genus2 			= (`count7'==1)
		qui: estadd scalar lang_pair_encode	= (`count8'==1)

		* update counters
		local ++counter
		local ++count8
		} 
		local ++count7
		} 
		local ++count6
		} 
		local ++count5
		} 
		local ++count4
		} 
		local ++count3
		} 
		local ++count2
		} 
		local ++count1
 }

set scheme stcolor
speccurve est*,  graphopts(legend(off)) ///
param(askedstrong) ///
panel(proficiency_asked strong_weak_gap    gender  college  race genus1 genus2 lang_pair_encode) ///
ytitle("Coefficient on Treatment")

cap graph play "making things blue" // a tiny edit for looks - won't work for you. You can edit for looks in graph editor. 
	
exit



































